import numpy as np
import pandas as pd
from pandas import read_csv
from sklearn.metrics import mean_squared_error
from math import sqrt
from numpy import mean
from matplotlib import pyplot
import matplotlib.pyplot as plt
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')
consumption = pd.read_csv('https://raw.githubusercontent.com/Louise198713/Speciale/main/Privatforbrug%201990K1-2020K4.csv',
sep=';',
header=0,
index_col=0,
parse_dates=[0],
names=['Time','consumption'])
consumption['Time']=pd.period_range('1990-01', periods=124, freq='Q')
consumption['consumption'] = pd.to_numeric(consumption['consumption'])
consumption=consumption.set_index('Time')
consumption.index = consumption.index.to_timestamp()
exports = pd.read_csv('https://raw.githubusercontent.com/Louise198713/Speciale/main/Eksport%201990K1-2020K4.csv',
sep=';',
header=0,
index_col=0,
parse_dates=[0],
names=['Time','exports'])
exports['Time']=pd.period_range('1990-01', periods=124, freq='Q')
exports['exports'] = pd.to_numeric(exports['exports'])
exports=exports.set_index('Time')
exports.index = exports.index.to_timestamp()
imports = pd.read_csv('https://raw.githubusercontent.com/Louise198713/Speciale/main/Import%201990K1-2020K4.csv',
sep=';',
header=0,
index_col=0,
parse_dates=[0],
names=['Time','imports'])
imports['Time']=pd.period_range('1990-01', periods=124, freq='Q')
imports['imports'] = pd.to_numeric(imports['imports'])
imports=imports.set_index('Time')
imports.index = imports.index.to_timestamp()
investments = pd.read_csv('https://raw.githubusercontent.com/Louise198713/Speciale/main/Investment1990-2020.csv',
sep=';',
header=0,
index_col=0,
parse_dates=[0],
names=['Time','investments'])
investments['Time']=pd.period_range('1990-01', periods=124, freq='Q')
investments['investments'] = pd.to_numeric(investments['investments'])
investments=investments.set_index('Time')
investments.index = investments.index.to_timestamp()
government_expenditure = pd.read_csv('https://raw.githubusercontent.com/Louise198713/Speciale/main/Public%20consumption1990-2020.csv',
sep=';',
header=0,
index_col=0,
parse_dates=[0],
names=['Time','government_expenditure'])
government_expenditure['Time']=pd.period_range('1990-01', periods=124, freq='Q')
government_expenditure['government_expenditure'] = pd.to_numeric(government_expenditure['government_expenditure'])
government_expenditure=government_expenditure.set_index('Time')
government_expenditure.index = government_expenditure.index.to_timestamp()
gdp = pd.read_csv('https://raw.githubusercontent.com/Louise198713/Speciale/main/GDP1990-2020.csv',
sep=';',
header=0,
index_col=0,
parse_dates=[0],
names=['Time','gdp'])
gdp['Time']=pd.period_range('1990-01', periods=124, freq='Q')
gdp['gdp'] = pd.to_numeric(gdp['gdp'])
gdp=gdp.set_index('Time')
gdp.index = gdp.index.to_timestamp()
plt.rcParams["figure.figsize"] = (20,3)
%matplotlib inline
plt.style.use('fivethirtyeight')
import seaborn as sns
sns.set(rc={'figure.figsize':(15, 12)})
fig, axs = plt.subplots(3, 2, sharex=False, sharey=False, figsize=(18,12))
axs[0, 0].plot(gdp.gdp)
axs[0, 0].set_title('GDP')
plt.setp(axs[0, 0], ylabel='Bill. DKK')
axs[0, 1].plot(consumption.consumption, 'tab:blue')
axs[0, 1].set_title('Consumption')
plt.setp(axs[1, 0], ylabel='Bill. DKK')
axs[1, 0].plot(imports.imports)
axs[1, 0].set_title('Imports')
plt.setp(axs[2, 0], ylabel='Bill. DKK')
axs[1, 1].plot(exports.exports)
axs[1, 1].set_title('Exports')
plt.setp(axs[2, 1], ylabel='Bill. DKK')
axs[2, 0].plot(investments.investments)
axs[2, 0].set_title('Investments')
plt.setp(axs[0, 1], ylabel='Bill. DKK')
axs[2, 1].plot(government_expenditure.government_expenditure)
axs[2, 1].set_title('Government expenditure')
plt.setp(axs[1, 1], ylabel='Bill. DKK')
nobs = 6
imports_train, imports_test =imports[0:-nobs], imports[-nobs:]
exports_train, exports_test =exports[0:-nobs], exports[-nobs:]
investments_train, investments_test =investments[0:-nobs], investments[-nobs:]
government_expenditure_train, government_expenditure_test =government_expenditure[0:-nobs], government_expenditure[-nobs:]
gdp_train, gdp_test =gdp[0:-nobs], gdp[-nobs:]
consumption_train, consumption_test =consumption[0:-nobs], consumption[-nobs:]
# Check size
print(consumption_train.shape) # (118, 1)
print(consumption_test.shape) # (6, 1)
Baseline for Consumption
X = consumption.values
train, test = X[0:-6], X[-6:]
# walk-forward validation for test predictions
history = [x for x in train]
predictions_test = list()
for i in range(len(test)):
# make prediction
predictions_test.append(history[-4])
# observation
history.append(test[i])
#Converting to dataframe to be able to concatenate
predictions_test = pd. DataFrame(predictions_test)
# walk-forward validation for train predictions
predictions_train = list()
for i in range(len(train)):
# make prediction
predictions_train.append(history[-4])
# observation
history.append(train[i])
#Converting to dataframe and replacing first 4 observations with NAs
predictions_train = pd.DataFrame(predictions_train)
predictions_train[0][:4] = np.NaN
#Concatenating predictions
predictions_total = pd.concat([predictions_train, predictions_test], ignore_index=True)
#Performance measures
forecast_values_consumption_baseline = predictions_test
rmse_test = sqrt(mean_squared_error(test, predictions_test))
rmse_train = sqrt(mean_squared_error(train[4:], predictions_train[4:]))
print('RMSE Train: %.3f' % rmse_train)
print('RMSE Test: %.3f' % rmse_test)
predictions = predictions_total[4:]
observations = pd.DataFrame(X[4:])
predictions['Time']=pd.period_range('1991-01', periods=120, freq='Q')
predictions=predictions.set_index('Time')
predictions.index = predictions.index.to_timestamp()
observations['Time']=pd.period_range('1991-01', periods=120, freq='Q')
observations=observations.set_index('Time')
observations.index = observations.index.to_timestamp()
# line plot of observed vs predicted
plt.figure(figsize=(15, 7.5))
plt.axvspan(consumption.index[-7], consumption.index[-1], alpha=0.5, color='lightgrey')
plt.plot(observations, color='r', label='actual')
plt.plot(predictions, label='prediction')
plt.legend()
plt.show()
Baseline for Imports
X = imports.values
train, test = X[0:-6], X[-6:]
# walk-forward validation for test predictions
history = [x for x in train]
predictions_test = list()
for i in range(len(test)):
# make prediction
predictions_test.append(history[-4])
# observation
history.append(test[i])
#Converting to dataframe to be able to concatenate
predictions_test = pd. DataFrame(predictions_test)
# walk-forward validation for train predictions
predictions_train = list()
for i in range(len(train)):
# make prediction
predictions_train.append(history[-4])
# observation
history.append(train[i])
#Converting to dataframe and replacing first 4 observations with NAs
predictions_train = pd.DataFrame(predictions_train)
predictions_train[0][:4] = np.NaN
#Concatenating predictions
predictions_total = pd.concat([predictions_train, predictions_test], ignore_index=True)
# report performance
forecast_values_imports_baseline = predictions_test
rmse_test = sqrt(mean_squared_error(test, predictions_test))
rmse_train = sqrt(mean_squared_error(train[4:], predictions_train[4:]))
print('RMSE Train: %.3f' % rmse_train)
print('RMSE Test: %.3f' % rmse_test)
predictions = predictions_total[4:]
observations = pd.DataFrame(X[4:])
predictions['Time']=pd.period_range('1991-01', periods=120, freq='Q')
predictions=predictions.set_index('Time')
predictions.index = predictions.index.to_timestamp()
observations['Time']=pd.period_range('1991-01', periods=120, freq='Q')
observations=observations.set_index('Time')
observations.index = observations.index.to_timestamp()
# line plot of observed vs predicted
plt.figure(figsize=(15, 7.5))
plt.axvspan(imports.index[-7], imports.index[-1], alpha=0.5, color='lightgrey')
plt.plot(observations, color='r', label='actual')
plt.plot(predictions, label='prediction')
plt.legend()
plt.show()
Baseline for Export
X = exports.values
train, test = X[0:-6], X[-6:]
# walk-forward validation for test predictions
history = [x for x in train]
predictions_test = list()
for i in range(len(test)):
# make prediction
predictions_test.append(history[-4])
# observation
history.append(test[i])
#Converting to dataframe to be able to concatenate
predictions_test = pd. DataFrame(predictions_test)
# walk-forward validation for train predictions
predictions_train = list()
for i in range(len(train)):
# make prediction
predictions_train.append(history[-4])
# observation
history.append(train[i])
#Converting to dataframe and replacing first 4 observations with NAs
predictions_train = pd.DataFrame(predictions_train)
predictions_train[0][:4] = np.NaN
#Concatenating predictions
predictions_total = pd.concat([predictions_train, predictions_test], ignore_index=True)
# report performance
forecast_values_exports_baseline = predictions_test
rmse_test = sqrt(mean_squared_error(test, predictions_test))
rmse_train = sqrt(mean_squared_error(train[4:], predictions_train[4:]))
print('RMSE Train: %.3f' % rmse_train)
print('RMSE Test: %.3f' % rmse_test)
predictions = predictions_total[4:]
observations = pd.DataFrame(X[4:])
predictions['Time']=pd.period_range('1991-01', periods=120, freq='Q')
predictions=predictions.set_index('Time')
predictions.index = predictions.index.to_timestamp()
observations['Time']=pd.period_range('1991-01', periods=120, freq='Q')
observations=observations.set_index('Time')
observations.index = observations.index.to_timestamp()
# line plot of observed vs predicted
plt.figure(figsize=(15, 7.5))
plt.axvspan(exports.index[-7], exports.index[-1], alpha=0.5, color='lightgrey')
plt.plot(observations, color='r', label='actual')
plt.plot(predictions, label='prediction')
plt.legend()
plt.show()
Baseline for Investment
X = investments.values
train, test = X[0:-6], X[-6:]
# walk-forward validation for test predictions
history = [x for x in train]
predictions_test = list()
for i in range(len(test)):
# make prediction
predictions_test.append(history[-4])
# observation
history.append(test[i])
#Converting to dataframe to be able to concatenate
predictions_test = pd. DataFrame(predictions_test)
# walk-forward validation for train predictions
predictions_train = list()
for i in range(len(train)):
# make prediction
predictions_train.append(history[-4])
# observation
history.append(train[i])
#Converting to dataframe and replacing first 4 observations with NAs
predictions_train = pd.DataFrame(predictions_train)
predictions_train[0][:4] = np.NaN
#Concatenating predictions
predictions_total = pd.concat([predictions_train, predictions_test], ignore_index=True)
# report performance
forecast_values_investments_baseline = predictions_test
rmse_test = sqrt(mean_squared_error(test, predictions_test))
rmse_train = sqrt(mean_squared_error(train[4:], predictions_train[4:]))
print('RMSE Train: %.3f' % rmse_train)
print('RMSE Test: %.3f' % rmse_test)
predictions = predictions_total[4:]
observations = pd.DataFrame(X[4:])
predictions['Time']=pd.period_range('1991-01', periods=120, freq='Q')
predictions=predictions.set_index('Time')
predictions.index = predictions.index.to_timestamp()
observations['Time']=pd.period_range('1991-01', periods=120, freq='Q')
observations=observations.set_index('Time')
observations.index = observations.index.to_timestamp()
# line plot of observed vs predicted
plt.figure(figsize=(15, 7.5))
plt.axvspan(investments.index[-7], investments.index[-1], alpha=0.5, color='lightgrey')
plt.plot(observations, color='r', label='actual')
plt.plot(predictions, label='prediction')
plt.legend()
plt.show()
Baseline for Goverment Expenditures
X = government_expenditure.values
train, test = X[0:-6], X[-6:]
# walk-forward validation for test predictions
history = [x for x in train]
predictions_test = list()
for i in range(len(test)):
# make prediction
predictions_test.append(history[-4])
# observation
history.append(test[i])
#Converting to dataframe to be able to concatenate
predictions_test = pd. DataFrame(predictions_test)
# walk-forward validation for train predictions
predictions_train = list()
for i in range(len(train)):
# make prediction
predictions_train.append(history[-4])
# observation
history.append(train[i])
#Converting to dataframe and replacing first 4 observations with NAs
predictions_train = pd.DataFrame(predictions_train)
predictions_train[0][:4] = np.NaN
#Concatenating predictions
predictions_total = pd.concat([predictions_train, predictions_test], ignore_index=True)
# report performance
forecast_values_gov_baseline = predictions_test
rmse_test = sqrt(mean_squared_error(test, predictions_test))
rmse_train = sqrt(mean_squared_error(train[4:], predictions_train[4:]))
print('RMSE Train: %.3f' % rmse_train)
print('RMSE Test: %.3f' % rmse_test)
predictions = predictions_total[4:]
observations = pd.DataFrame(X[4:])
predictions['Time']=pd.period_range('1991-01', periods=120, freq='Q')
predictions=predictions.set_index('Time')
predictions.index = predictions.index.to_timestamp()
observations['Time']=pd.period_range('1991-01', periods=120, freq='Q')
observations=observations.set_index('Time')
observations.index = observations.index.to_timestamp()
# line plot of observed vs predicted
plt.figure(figsize=(15, 7.5))
plt.axvspan(government_expenditure.index[-7], government_expenditure.index[-1], alpha=0.5, color='lightgrey')
plt.plot(observations, color='r', label='actual')
plt.plot(predictions, label='prediction')
plt.legend()
plt.show()
Baseline for GDP
X = gdp.values
train, test = X[0:-6], X[-6:]
# walk-forward validation for test predictions
history = [x for x in train]
predictions_test = list()
for i in range(len(test)):
# make prediction
predictions_test.append(history[-4])
# observation
history.append(test[i])
#Converting to dataframe to be able to concatenate
predictions_test = pd. DataFrame(predictions_test)
# walk-forward validation for train predictions
predictions_train = list()
for i in range(len(train)):
# make prediction
predictions_train.append(history[-4])
# observation
history.append(train[i])
#Converting to dataframe and replacing first 4 observations with NAs
predictions_train = pd.DataFrame(predictions_train)
predictions_train[0][:4] = np.NaN
#Concatenating predictions
predictions_total = pd.concat([predictions_train, predictions_test], ignore_index=True)
# report performance
forecast_values_gdp_baseline = predictions_test
rmse_test = sqrt(mean_squared_error(test, predictions_test))
rmse_train = sqrt(mean_squared_error(train[4:], predictions_train[4:]))
print('RMSE Train: %.3f' % rmse_train)
print('RMSE Test: %.3f' % rmse_test)
predictions = predictions_total[4:]
observations = pd.DataFrame(X[4:])
predictions['Time']=pd.period_range('1991-01', periods=120, freq='Q')
predictions=predictions.set_index('Time')
predictions.index = predictions.index.to_timestamp()
observations['Time']=pd.period_range('1991-01', periods=120, freq='Q')
observations=observations.set_index('Time')
observations.index = observations.index.to_timestamp()
# line plot of observed vs predicted
plt.figure(figsize=(15, 7.5))
plt.axvspan(government_expenditure.index[-7], government_expenditure.index[-1], alpha=0.5, color='lightgrey')
plt.plot(observations, color='r', label='actual')
plt.plot(predictions, label='prediction')
plt.legend()
plt.show()
!pip install pmdarima
from statsmodels.tsa.seasonal import seasonal_decompose
from dateutil.parser import parse
import pandas.util.testing as tm
Decomposition for Consumption
decomposition = seasonal_decompose(consumption_train.consumption, model='additive')
plt.rcParams.update({'figure.figsize': (10,10)})
decomposition.plot();
Decomposition for Imports
decomposition = seasonal_decompose(imports_train['imports'], model='additive')
plt.rcParams.update({'figure.figsize': (10,10)})
decomposition.plot();
Decomposition for Exports
decomposition = seasonal_decompose(exports_train['exports'], model='additive')
plt.rcParams.update({'figure.figsize': (10,10)})
decomposition.plot();
Decomposition for Investments
decomposition = seasonal_decompose(investments_train['investments'], model='additive')
plt.rcParams.update({'figure.figsize': (10,10)})
decomposition.plot();
Decomposition for Government Expenditures
decomposition = seasonal_decompose(government_expenditure_train['government_expenditure'], model='additive')
plt.rcParams.update({'figure.figsize': (10,10)})
decomposition.plot();
ACF for Consumption
import statsmodels.api as sm
plt.rcParams["figure.figsize"] = (8,6)
sm.graphics.tsa.plot_acf(consumption_train.consumption, lags=40, alpha = 0.05, title='ACF of Consumption');
ACF for Imports, Exports, Investments and Government Expenditures
fig, ax=plt.subplots(2,2, figsize=(18,10))
fig.suptitle('Autocorrelation of original series')
sm.graphics.tsa.plot_acf(imports_train.imports, lags=40, alpha = 0.05, ax=ax[0,0], title='ACF of Imports')
sm.graphics.tsa.plot_acf(exports_train.exports, lags=40, alpha = 0.05, ax=ax[0,1], title='ACF of Exports')
sm.graphics.tsa.plot_acf(investments_train.investments, lags=40, alpha = 0.05, ax=ax[1,0], title='ACF of Investments')
sm.graphics.tsa.plot_acf(government_expenditure_train.government_expenditure, lags=40, alpha = 0.05, ax=ax[1,1], title='ACF of Government Expenditure');
from statsmodels.tsa.stattools import adfuller
def adfuller_test(series, signif=0.05, name='', verbose=False):
r = adfuller(series, autolag='AIC', regression = 'ct')
output = {'test_statistic':round(r[0], 4), 'pvalue':round(r[1], 4), 'n_lags':round(r[2], 4), 'n_obs':r[3]}
p_value = output['pvalue']
def adjust(val, length= 6): return str(val).ljust(length)
# Print Summary
print(f' p-value = {p_value}')
if p_value <= signif:
print(f" Rejecting Null Hypothesis => Series is Stationary.")
else:
print(f" Weak evidence to reject the Null Hypothesis => Series is non-stationary.")
print(f'ADF for original series')
print(f'Consumption:')
print(adfuller_test(consumption_train.consumption))
print(f'Imports:')
print(adfuller_test(imports_train.imports))
print(f'Exports:')
print(adfuller_test(exports_train.exports))
print(f'Investments:')
print(adfuller_test(investments_train.investments))
print(f'Government Expenditures:')
print(adfuller_test(government_expenditure_train.government_expenditure))
from statsmodels.tsa.stattools import kpss
def kpss_test(series, **kw):
statistic, p_value, n_lags, critical_values = kpss(series, nlags='auto', **kw)
def adjust(val, length= 6): return str(val).ljust(length)
print(f'p-value = {p_value}')
print(f'The series is {"not " if p_value < 0.05 else ""}stationary')
print(f'KPSS for original series')
print(f'Consumption:')
print(kpss_test(consumption_train.consumption))
print(f'Imports:')
print(kpss_test(imports_train.imports))
print(f'Exports:')
print(kpss_test(exports_train.exports))
print(f'Investments:')
print(kpss_test(investments_train.investments))
print(f'Government Expenditures:')
print(kpss_test(government_expenditure_train.government_expenditure))
#Taking the first difference to remove overall trend
gdp_train['gdp_diff'] = gdp_train.gdp.diff().dropna(inplace=False)
consumption_train['consumption_diff'] = gdp_train.gdp.diff().dropna(inplace=False)
imports_train['imports_diff'] = imports_train.imports.diff().dropna(inplace=False)
exports_train['exports_diff'] = exports_train.exports.diff().dropna(inplace=False)
investments_train['investments_diff'] = investments_train.investments.diff().dropna(inplace=False)
government_expenditure_train['government_expenditure_diff'] = government_expenditure_train.government_expenditure.diff().dropna(inplace=False)
Plots of Consumption
plt.rcParams["figure.figsize"] = (10,6)
plt.plot(consumption_train.consumption_diff)
plt.title('First differenced Consumption')
sm.graphics.tsa.plot_acf(consumption_train.consumption_diff.dropna(), lags=40, alpha = 0.05, title='ACF of Consumption');
Plots of Imports, Exports, Investments and Government Expenditures
fig, axs = plt.subplots(2, 2, figsize=(18,10), sharex=False, sharey=False)
fig.suptitle('First differenced series')
axs[0, 0].plot(imports_train.imports_diff)
axs[0, 0].set_title('Imports')
axs[0, 1].plot(exports_train.exports_diff)
axs[0, 1].set_title('Exports')
axs[1, 0].plot(investments_train.investments_diff)
axs[1, 0].set_title('Investments')
axs[1, 1].plot(government_expenditure_train.government_expenditure_diff)
axs[1, 1].set_title('Government expenditure')
fig, ax=plt.subplots(2,2, figsize=(18,10))
fig.suptitle('Autocorrelation of first differenced series')
sm.graphics.tsa.plot_acf(imports_train.imports_diff.dropna(), lags=40, alpha = 0.05, ax=ax[0,0], title='ACF of Imports')
sm.graphics.tsa.plot_acf(exports_train.exports_diff.dropna(), lags=40, alpha = 0.05, ax=ax[0,1], title='ACF of Exports')
sm.graphics.tsa.plot_acf(investments_train.investments_diff.dropna(), lags=40, alpha = 0.05, ax=ax[1,0], title='ACF of Investments')
sm.graphics.tsa.plot_acf(government_expenditure_train.government_expenditure_diff.dropna(), lags=40, alpha = 0.05, ax=ax[1,1], title='ACF of Government Expenditure');
def adfuller_test(series, signif=0.05, name='', verbose=False):
r = adfuller(series, autolag='AIC', regression = 'c') # Default: Regression = 'c' (constant only)
output = {'test_statistic':round(r[0], 4), 'pvalue':round(r[1], 4), 'n_lags':round(r[2], 4), 'n_obs':r[3]}
p_value = output['pvalue']
def adjust(val, length= 6): return str(val).ljust(length)
print(f' p-value = {p_value}')
if p_value <= signif:
print(f" Rejecting Null Hypothesis => Series is Stationary.")
else:
print(f" Weak evidence to reject the Null Hypothesis => Series is non-stationary.")
print(f'ADF for first differenced series')
print(f'Consumption:')
print(adfuller_test(consumption_train.consumption_diff.dropna()))
print(f'Imports:')
print(adfuller_test(imports_train.imports_diff.dropna()))
print(f'Exports:')
print(adfuller_test(exports_train.exports_diff.dropna()))
print(f'Investments:')
print(adfuller_test(investments_train.investments_diff.dropna()))
print(f'government expenditure:')
print(adfuller_test(government_expenditure_train.government_expenditure_diff.dropna()))
print(f'KPSS for first differenced series')
print(f'Consumption:')
print(kpss_test(consumption_train.consumption_diff.dropna()))
print(f'Imports:')
print(kpss_test(imports_train.imports_diff.dropna()))
print(f'Exports:')
print(kpss_test(exports_train.exports_diff.dropna()))
print(f'Investments:')
print(kpss_test(investments_train.investments_diff.dropna()))
print(f'government expenditure:')
print(kpss_test(government_expenditure_train.government_expenditure_diff.dropna()))
consumption_train['consumption_season'] = consumption_train.consumption_diff.diff(4).dropna()
imports_train['imports_season'] = imports_train.imports_diff.diff(4).dropna()
exports_train['exports_season'] = exports_train.exports_diff.diff(4).dropna()
investments_train['investments_season'] = investments_train.investments_diff.diff(4).dropna()
government_expenditure_train['government_expenditure_season'] = government_expenditure_train.government_expenditure_diff.diff(4).dropna()
Plots of Consumption
plt.rcParams["figure.figsize"] = (10,6)
plt.plot(consumption_train.consumption_season)
sm.graphics.tsa.plot_acf(consumption_train.consumption_season.dropna(), lags=40, alpha = 0.05, title='ACF of consumption');
Plots of Imports, Exports, Investments and Government Expenditures
fig, axs = plt.subplots(2, 2, figsize=(18,10), sharex=False, sharey=False)
fig.suptitle('First and seasonality differenced series')
axs[0, 0].plot(imports_train.imports_season)
axs[0, 0].set_title('Imports')
axs[0, 1].plot(exports_train.exports_season)
axs[0, 1].set_title('Exports')
axs[1, 0].plot(investments_train.investments_season)
axs[1, 0].set_title('Investments')
axs[1, 1].plot(government_expenditure_train.government_expenditure_season)
axs[1, 1].set_title('government expenditure')
fig, ax=plt.subplots(2,2, figsize=(18,10))
fig.suptitle('Autocorrelation of first and seasonality differenced series')
sm.graphics.tsa.plot_acf(imports_train.imports_season.dropna(), lags=40, alpha = 0.05, ax=ax[0,0], title='ACF of Imports')
sm.graphics.tsa.plot_acf(exports_train.exports_season.dropna(), lags=40, alpha = 0.05, ax=ax[0,1], title='ACF of Exports')
sm.graphics.tsa.plot_acf(investments_train.investments_season.dropna(), lags=40, alpha = 0.05, ax=ax[1,0], title='ACF of Investments')
sm.graphics.tsa.plot_acf(government_expenditure_train.government_expenditure_season.dropna(), lags=40, alpha = 0.05, ax=ax[1,1], title='ACF of Government Expenditure');
def adfuller_test(series, signif=0.05, name='', verbose=False):
r = adfuller(series, autolag='AIC', regression = 'nc')
output = {'test_statistic':round(r[0], 4), 'pvalue':round(r[1], 4), 'n_lags':round(r[2], 4), 'n_obs':r[3]}
p_value = output['pvalue']
def adjust(val, length= 6): return str(val).ljust(length)
print(f' p-value = {p_value}')
if p_value <= signif:
print(f" Rejecting Null Hypothesis => Series is Stationary.")
else:
print(f" Weak evidence to reject the Null Hypothesis => Series is non-stationary.")
print(f'ADF for first and seasonality differenced series')
print(f'Consumption:')
print(adfuller_test(consumption_train.consumption_season.dropna()))
print(f'Imports:')
print(adfuller_test(imports_train.imports_season.dropna()))
print(f'Exports:')
print(adfuller_test(exports_train.exports_season.dropna()))
print(f'Investments:')
print(adfuller_test(investments_train.investments_season.dropna()))
print(f'government expenditure:')
print(adfuller_test(government_expenditure_train.government_expenditure_season.dropna()))
print(f'KPSS for first differenced series')
print(f'Consumption:')
print(kpss_test(consumption_train.consumption_season.dropna()))
print(f'Imports:')
print(kpss_test(imports_train.imports_season.dropna()))
print(f'Exports:')
print(kpss_test(exports_train.exports_season.dropna()))
print(f'Investments:')
print(kpss_test(investments_train.investments_season.dropna()))
print(f'government expenditure:')
print(kpss_test(government_expenditure_train.government_expenditure_season.dropna()))
Plots for Consumption
fig, ax = plt.subplots(1,2,figsize=(16,4))
sm.graphics.tsa.plot_acf(consumption_train.consumption_season.dropna(), lags=40, alpha = 0.05, ax=ax[0], title= 'ACF of Consumption')
sm.graphics.tsa.plot_pacf(consumption_train.consumption_season.dropna(), lags=40, alpha=0.05, ax=ax[1], title='PACF of Consumption')
plt.show()
Plots for Imports, Exports, Investments and Government Expenditures
fig, ax = plt.subplots(4,2,figsize=(19,14))
sm.graphics.tsa.plot_acf(imports_train.imports_season.dropna(), lags=40, alpha = 0.05, ax=ax[0,0], title='ACF of Imports')
sm.graphics.tsa.plot_pacf(imports_train.imports_season.dropna(), lags=40, alpha=0.05, ax=ax[0,1], title='PACF of Imports')
sm.graphics.tsa.plot_acf(exports_train.exports_season.dropna(), lags=40, alpha = 0.05, ax=ax[1,0], title='ACF of Exports')
sm.graphics.tsa.plot_pacf(exports_train.exports_season.dropna(), lags=40, alpha=0.05, ax=ax[1,1], title='PACF of Exports')
sm.graphics.tsa.plot_acf(investments_train.investments_season.dropna(), lags=40, alpha = 0.05, ax=ax[2,0], title='ACF of Investments')
sm.graphics.tsa.plot_pacf(investments_train.investments_season.dropna(), lags=40, alpha=0.05, ax=ax[2,1], title='PACF of Investments')
sm.graphics.tsa.plot_acf(government_expenditure_train.government_expenditure_season.dropna(), lags=40, alpha = 0.05, ax=ax[3,0], title='ACF of Government Expenditure')
sm.graphics.tsa.plot_pacf(government_expenditure_train.government_expenditure_season.dropna(), lags=40, alpha=0.05, ax=ax[3,1], title='PACF of Government Expenditure')
plt.show()
from pmdarima import auto_arima
from pmdarima import ARIMA
imports_best = auto_arima(imports_train.imports.dropna(), start_p=0, d=1, start_q=0, max_p=3, max_q=3, start_P=0, D=1, start_Q=0, max_P=3, max_Q=3,m=4, seasonal=True, with_intercept=False, information_criterion='aic')
exports_best = auto_arima(exports_train.exports.dropna(), start_p=0, d=1, start_q=0, max_p=3, max_q=3, start_P=0, D=1, start_Q=0, max_P=3, max_Q=3,m=4, seasonal=True, with_intercept=False, information_criterion='aic')
investments_best = auto_arima(investments_train.investments.dropna(), start_p=0, d=1, start_q=0, max_p=3, max_q=3, start_P=0, D=1, start_Q=0, max_P=3, max_Q=3,m=4, seasonal=True, with_intercept=False, information_criterion='aic')
government_expenditure_best = auto_arima(government_expenditure_train.government_expenditure.dropna(), start_p=0, d=1, start_q=0, max_p=3, max_q=3, start_P=0, D=1, start_Q=0, max_P=3, max_Q=3,m=4, seasonal=True, with_intercept=False, information_criterion='aic')
consumption_best = auto_arima(consumption_train.consumption.dropna(), start_p=0, d=1, start_q=0, max_p=3, max_q=3, start_P=0, D=1, start_Q=0, max_P=3, max_Q=3,m=4, seasonal=True, with_intercept=False, information_criterion='aic')
print(f'Consumption'), print(consumption_best)
print(f'Imports'), print(imports_best)
print(f'Exports'),print(exports_best)
print(f'Investments'), print(investments_best)
print(f'government_expenditure'), print(government_expenditure_best)
print(f'Consumption'), print(consumption_best.summary())
print(f'Imports'), print(imports_best.summary())
print(f'Exports'), print(exports_best.summary())
print(f'Investments'), print(investments_best.summary())
print(f'government_expenditure'), print(government_expenditure_best.summary())
import statsmodels.api as sm
np.set_printoptions(suppress=True) #Avoiding scientific notation
Roots for Consumption
MA_consumption=consumption_best.maroots()
inverse_MA_consumption=1/MA_consumption
AR_consumption=consumption_best.arroots()
inverse_AR_consumption=1/AR_consumption
print(f'MA roots'), print(pd.DataFrame(inverse_MA_consumption))
print(f'AR roots'), print(pd.DataFrame(inverse_AR_consumption))
%pylab inline
#plot unit circle
t = linspace(0,2*pi,101)
plot(cos(t),sin(t))
plot([0,0],[-1.1,1.1],'k',linewidth=0.5)
plot([-1.1,1.1],[0,0],'k',linewidth=0.5)
plt.title('Inverse MA roots')
axes().set_aspect('equal')
x = [ele.real for ele in inverse_MA_consumption]
y = [ele.imag for ele in inverse_MA_consumption]
plt.scatter(x, y, color='red')
plt.ylabel('Imaginary')
plt.xlabel('Real')
Roots for Imports
MA_imports=imports_best.maroots()
inverse_MA_imports=1/MA_imports
AR_imports=imports_best.arroots()
inverse_AR_imports=1/AR_imports
print(f'MA roots'), print(pd.DataFrame(inverse_MA_imports))
print(f'AR roots'), print(pd.DataFrame(inverse_AR_imports))
#plot unit circle
t = linspace(0,2*pi,101)
plot(cos(t),sin(t))
plot([0,0],[-1.1,1.1],'k',linewidth=0.5)
plot([-1.1,1.1],[0,0],'k',linewidth=0.5)
plt.title('Inverse AR roots')
axes().set_aspect('equal')
x = [ele.real for ele in inverse_AR_imports]
y = [ele.imag for ele in inverse_AR_imports]
plt.scatter(x, y, color='red')
plt.ylabel('Imaginary')
plt.xlabel('Real')
Roots for Exports
MA_exports=exports_best.maroots()
inverse_MA_exports=1/MA_exports
AR_exports=exports_best.arroots()
inverse_AR_exports=1/AR_exports
print(f'MA roots'), print(pd.DataFrame(inverse_MA_exports))
print(f'AR roots'), print(pd.DataFrame(inverse_AR_exports))
#plot unit circle
t = linspace(0,2*pi,101)
plot(cos(t),sin(t))
plot([0,0],[-1.1,1.1],'k',linewidth=0.5)
plot([-1.1,1.1],[0,0],'k',linewidth=0.5)
plt.title('Inverse MA roots')
axes().set_aspect('equal')
x = [ele.real for ele in inverse_MA_exports]
y = [ele.imag for ele in inverse_MA_exports]
plt.scatter(x, y, color='red')
plt.ylabel('Imaginary')
plt.xlabel('Real')
#plot unit circle
t = linspace(0,2*pi,101)
plot(cos(t),sin(t))
plot([0,0],[-1.1,1.1],'k',linewidth=0.5)
plot([-1.1,1.1],[0,0],'k',linewidth=0.5)
plt.title('Inverse AR roots')
axes().set_aspect('equal')
x = [ele.real for ele in inverse_AR_exports]
y = [ele.imag for ele in inverse_AR_exports]
plt.scatter(x, y, color='red')
plt.ylabel('Imaginary')
plt.xlabel('Real')
Roots for Investments
MA_investments=investments_best.maroots()
inverse_MA_investments=1/MA_investments
AR_investments=investments_best.arroots()
inverse_AR_investments=1/AR_investments
print(f'MA roots'), print(pd.DataFrame(inverse_MA_investments))
print(f'AR roots'), print(pd.DataFrame(inverse_AR_investments))
#plot unit circle
t = linspace(0,2*pi,101)
plot(cos(t),sin(t))
plot([0,0],[-1.1,1.1],'k',linewidth=0.5)
plot([-1.1,1.1],[0,0],'k',linewidth=0.5)
plt.title('Inverse MA roots')
axes().set_aspect('equal')
x = [ele.real for ele in inverse_MA_investments]
y = [ele.imag for ele in inverse_MA_investments]
plt.scatter(x, y, color='red')
plt.ylabel('Imaginary')
plt.xlabel('Real')
Roots for Government Expenditure
MA_government_expenditure=government_expenditure_best.maroots()
inverse_MA_government_expenditure=1/MA_government_expenditure
AR_government_expenditure=government_expenditure_best.arroots()
inverse_AR_government_expenditure=1/AR_government_expenditure
print(f'MA roots'), print(pd.DataFrame(inverse_MA_government_expenditure))
print(f'AR roots'), print(pd.DataFrame(inverse_AR_government_expenditure))
#plot unit circle
t = linspace(0,2*pi,101)
plot(cos(t),sin(t))
plot([0,0],[-1.1,1.1],'k',linewidth=0.5)
plot([-1.1,1.1],[0,0],'k',linewidth=0.5)
plt.title('Inverse MA roots')
axes().set_aspect('equal')
x = [ele.real for ele in inverse_MA_government_expenditure]
y = [ele.imag for ele in inverse_MA_government_expenditure]
plt.scatter(x, y, color='red')
plt.ylabel('Imaginary')
plt.xlabel('Real')
plt.rcParams["figure.figsize"] = (20,3)
%matplotlib inline
plt.style.use('fivethirtyeight')
#import seaborn as sns
sns.set(rc={'figure.figsize':(11, 4)})
Residuals for Consumption
consumption_best.plot_diagnostics(figsize=(15,12));
Residuals for Imports
imports_best.plot_diagnostics(figsize=(15,12));
Residuals for Exports
exports_best.plot_diagnostics(figsize=(15,12));
Residuals for Investments
investments_best.plot_diagnostics(figsize=(15,12));
Residuals for Government Expenditures
government_expenditure_best.plot_diagnostics(figsize=(15,12));
from statsmodels.stats.diagnostic import acorr_ljungbox
LB_consumption = acorr_ljungbox(consumption_best.resid(), boxpierce=False, lags=8, return_df=True)
LB_imports = acorr_ljungbox(imports_best.resid(), boxpierce=False, lags = 8, return_df=True) # Nulhypotese = Ingen seriel korrelation. Lille p-værdi = der er korrelation!
LB_exports = acorr_ljungbox(exports_best.resid(), boxpierce=False, lags = 8, return_df=True)
LB_investments = acorr_ljungbox(investments_best.resid(), boxpierce=False, lags = 8, return_df=True)
LB_government_expenditure = acorr_ljungbox(government_expenditure_best.resid(), boxpierce=False, lags = 8, return_df=True)
print(f'Consumption', LB_consumption)
print(f'Imports', LB_imports)
print(f'Exports', LB_exports)
print(f'Investments', LB_investments)
print(f'Government Expenditures', LB_government_expenditure)
for Consumption
consumption_train['arima_model'] = consumption_best.arima_res_.fittedvalues
consumption_train['arima_model'][:4+1] = np.NaN
consumption['consumption'][:4+1] = np.NaN
forecast_consumption = pd.DataFrame(consumption_best.predict(n_periods=6))
forecast_consumption['Time'] = forecast_consumption['Time']=pd.period_range('2019-07', periods=6, freq='Q')
forecast_consumption=forecast_consumption.set_index('Time')
forecast_consumption.index = forecast_consumption.index.to_timestamp()
forecast_consumption=forecast_consumption.squeeze() #Converting to a series
forecast_consumption = consumption_train['arima_model'].append(forecast_consumption)
plt.figure(figsize=(15, 7.5))
plt.plot(forecast_consumption, color='r', label='model')
plt.axvspan(consumption_train.index[-1], forecast_consumption.index[-1], alpha=0.5, color='lightgrey')
plt.plot(consumption['consumption'], label='actual')
plt.title('Consumption')
plt.legend()
plt.show()
for Imports
imports_train['arima_model'] = imports_best.arima_res_.fittedvalues
imports_train['arima_model'][:4+1] = np.NaN
imports['imports'][:4+1] = np.NaN
forecast_imports = pd.DataFrame(imports_best.predict(n_periods=6))
forecast_imports['Time'] = forecast_imports['Time']=pd.period_range('2019-07', periods=6, freq='Q')
forecast_imports=forecast_imports.set_index('Time')
forecast_imports.index = forecast_imports.index.to_timestamp()
forecast_imports=forecast_imports.squeeze() #Converting to a series
forecast_imports = imports_train['arima_model'].append(forecast_imports)
plt.figure(figsize=(15, 7.5))
plt.plot(forecast_imports, color='r', label='model')
plt.axvspan(imports_train.index[-1], forecast_imports.index[-1], alpha=0.5, color='lightgrey')
plt.plot(imports['imports'], label='actual')
plt.title('imports')
plt.legend()
plt.show()
for Exports
exports_train['arima_model'] = exports_best.arima_res_.fittedvalues
exports_train['arima_model'][:4+1] = np.NaN
exports['exports'][:4+1] = np.NaN
forecast_exports = pd.DataFrame(exports_best.predict(n_periods=6))
forecast_exports['Time'] = forecast_exports['Time']=pd.period_range('2019-07', periods=6, freq='Q')
forecast_exports=forecast_exports.set_index('Time')
forecast_exports.index = forecast_exports.index.to_timestamp()
forecast_exports=forecast_exports.squeeze() #Converting to a series
forecast_exports = exports_train['arima_model'].append(forecast_exports)
plt.figure(figsize=(15, 7.5))
plt.plot(forecast_exports, color='r', label='model')
plt.axvspan(exports_train.index[-1], forecast_exports.index[-1], alpha=0.5, color='lightgrey')
plt.plot(exports['exports'], label='actual')
plt.title('exports')
plt.legend()
plt.show()
for Investments
investments_train['arima_model'] = investments_best.arima_res_.fittedvalues
investments_train['arima_model'][:4+1] = np.NaN
investments['investments'][:4+1] = np.NaN
forecast_investments = pd.DataFrame(investments_best.predict(n_periods=6))
forecast_investments['Time'] = forecast_investments['Time']=pd.period_range('2019-07', periods=6, freq='Q')
forecast_investments=forecast_investments.set_index('Time')
forecast_investments.index = forecast_investments.index.to_timestamp()
forecast_investments=forecast_investments.squeeze() #Converting to a series
forecast_investments = investments_train['arima_model'].append(forecast_investments)
plt.figure(figsize=(15, 7.5))
plt.plot(forecast_investments, color='r', label='model')
plt.axvspan(investments_train.index[-1], forecast_investments.index[-1], alpha=0.5, color='lightgrey')
plt.plot(investments['investments'], label='actual')
plt.title('investments')
plt.legend()
plt.show()
for Government Expenditures
government_expenditure_train['arima_model'] = government_expenditure_best.arima_res_.fittedvalues
government_expenditure_train['arima_model'][:4+1] = np.NaN
government_expenditure['government_expenditure'][:4+1] = np.NaN
forecast_government_expenditure = pd.DataFrame(government_expenditure_best.predict(n_periods=6))
forecast_government_expenditure['Time'] = forecast_government_expenditure['Time']=pd.period_range('2019-07', periods=6, freq='Q')
forecast_government_expenditure=forecast_government_expenditure.set_index('Time')
forecast_government_expenditure.index = forecast_government_expenditure.index.to_timestamp()
forecast_government_expenditure=forecast_government_expenditure.squeeze() #Converting to a series
forecast_government_expenditure = government_expenditure_train['arima_model'].append(forecast_government_expenditure)
plt.figure(figsize=(15, 7.5))
plt.plot(forecast_government_expenditure, color='r', label='model')
plt.axvspan(government_expenditure_train.index[-1], forecast_government_expenditure.index[-1], alpha=0.5, color='lightgrey')
plt.plot(government_expenditure['government_expenditure'], label='actual')
plt.title('government_expenditure')
plt.legend()
plt.show()
from sklearn.metrics import mean_squared_error
import math
RMSE for Consumption
true_train_consumption = consumption_train.consumption.iloc[1:]
predicted_train_consumption = pd.DataFrame(consumption_best.arima_res_.fittedvalues).iloc[1:]
rmse_train_consumption = math.sqrt(mean_squared_error(true_train_consumption, predicted_train_consumption))
forecast_values_consumption_ARIMA = forecast_consumption.iloc[118:]
true_values_consumption_ARIMA = consumption_test.consumption
rmse_test_consumption = math.sqrt(mean_squared_error(true_values_consumption_ARIMA, forecast_values_consumption_ARIMA))
print('Train Score: %.2f RMSE' % (rmse_train_consumption))
print('Test Score: %.2f RMSE' % (rmse_test_consumption))
RMSE for Imports
true_train_imports = imports_train.imports.iloc[1:]
predicted_train_imports = pd.DataFrame(imports_best.arima_res_.fittedvalues).iloc[1:]
rmse_train_imports = math.sqrt(mean_squared_error(true_train_imports, predicted_train_imports))
forecast_values_imports_ARIMA = forecast_imports.iloc[118:]
true_values_imports_ARIMA = imports_test.imports
rmse_test_imports = math.sqrt(mean_squared_error(true_values_imports_ARIMA, forecast_values_imports_ARIMA))
print('Train Score: %.2f RMSE' % (rmse_train_imports))
print('Test Score: %.2f RMSE' % (rmse_test_imports))
RMSE for Exports
true_train_exports = exports_train.exports.iloc[1:]
predicted_train_exports = pd.DataFrame(exports_best.arima_res_.fittedvalues).iloc[1:]
rmse_train_exports = math.sqrt(mean_squared_error(true_train_exports, predicted_train_exports))
forecast_values_exports_ARIMA = forecast_exports.iloc[118:]
true_values_exports_ARIMA = exports_test.exports
rmse_test_exports = math.sqrt(mean_squared_error(true_values_exports_ARIMA, forecast_values_exports_ARIMA))
print('Train Score: %.2f RMSE' % (rmse_train_exports))
print('Test Score: %.2f RMSE' % (rmse_test_exports))
RMSE for Investments
true_train_investments = investments_train.investments.iloc[1:]
predicted_train_investments = pd.DataFrame(investments_best.arima_res_.fittedvalues).iloc[1:]
rmse_train_investments = math.sqrt(mean_squared_error(true_train_investments, predicted_train_investments))
forecast_values_investments_ARIMA = forecast_investments.iloc[118:]
true_values_investments_ARIMA = investments_test.investments
rmse_test_investments = math.sqrt(mean_squared_error(true_values_investments_ARIMA, forecast_values_investments_ARIMA))
print('Train Score: %.2f RMSE' % (rmse_train_investments))
print('Test Score: %.2f RMSE' % (rmse_test_investments))
RMSE for Government Expenditures
true_train_government_expenditure = government_expenditure_train.government_expenditure.iloc[1:]
predicted_train_government_expenditure = pd.DataFrame(government_expenditure_best.arima_res_.fittedvalues).iloc[1:]
rmse_train_government_expenditure = math.sqrt(mean_squared_error(true_train_government_expenditure, predicted_train_government_expenditure))
forecast_values_government_expenditure_ARIMA = forecast_government_expenditure.iloc[118:]
true_values_government_expenditure_ARIMA = government_expenditure_test.government_expenditure
rmse_test_government_expenditure = math.sqrt(mean_squared_error(true_values_government_expenditure_ARIMA, forecast_values_government_expenditure_ARIMA))
print('Train Score: %.2f RMSE' % (rmse_train_government_expenditure))
print('Test Score: %.2f RMSE' % (rmse_test_government_expenditure))
#Converting series objects to dataframes
forecast_consumption = forecast_consumption.to_frame('consumption')
forecast_imports = forecast_imports.to_frame('imports')
forecast_exports = forecast_exports.to_frame('exports')
forecast_investments = forecast_investments.to_frame('investments')
forecast_government_expenditure = forecast_government_expenditure.to_frame('government_expenditure')
#Merging all forecasts to one dataframe
data = pd.merge(forecast_consumption, forecast_imports, on='Time')
data = pd.merge(data, exports, on='Time')
data = pd.merge(data, investments, on='Time')
data = pd.merge(data, government_expenditure, on='Time')
#Adding the combined forecast and true gdp values to the data set
data['gdp_forecast'] = data.apply(lambda row: row.consumption - row.imports + row.exports + row.investments + row.government_expenditure, axis = 1)
data['gdp'] = gdp.gdp
data
plt.figure(figsize=(15, 7.5))
plt.plot(data['gdp_forecast'], color='r', label='prediction')
plt.axvspan(government_expenditure_train.index[-1], forecast_government_expenditure.index[-1], alpha=0.5, color='lightgrey')
plt.plot(data['gdp'], label='actual')
plt.title('GDP')
plt.legend()
plt.show()
true_train_gdp = data.gdp.iloc[5:118]
predicted_train_gdp = data.gdp_forecast.iloc[5:118]
rmse_train_gdp = math.sqrt(mean_squared_error(true_train_gdp, predicted_train_gdp))
forecast_values_gdp_ARIMA = data.gdp_forecast.iloc[118:]
true_values_gdp_ARIMA = data.gdp.iloc[118:]
rmse_test_gdp = math.sqrt(mean_squared_error(true_values_gdp_ARIMA, forecast_values_gdp_ARIMA))
print('Train Score: %.2f RMSE' % (rmse_train_gdp))
print('Test Score: %.2f RMSE' % (rmse_test_gdp))
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
consumption = pd.read_csv('https://raw.githubusercontent.com/Louise198713/Speciale/main/Privatforbrug%201990K1-2020K4.csv',
sep=';',
header=0,
index_col=0,
parse_dates=[0],
names=['Time','consumption'])
consumption['Time']=pd.period_range('1990-01', periods=124, freq='Q')
consumption['consumption'] = pd.to_numeric(consumption['consumption'])
consumption=consumption.set_index('Time')
consumption.index = consumption.index.to_timestamp()
exports = pd.read_csv('https://raw.githubusercontent.com/Louise198713/Speciale/main/Eksport%201990K1-2020K4.csv',
sep=';',
header=0,
index_col=0,
parse_dates=[0],
names=['Time','exports'])
exports['Time']=pd.period_range('1990-01', periods=124, freq='Q')
exports['exports'] = pd.to_numeric(exports['exports'])
exports=exports.set_index('Time')
exports.index = exports.index.to_timestamp()
imports = pd.read_csv('https://raw.githubusercontent.com/Louise198713/Speciale/main/Import%201990K1-2020K4.csv',
sep=';',
header=0,
index_col=0,
parse_dates=[0],
names=['Time','imports'])
imports['Time']=pd.period_range('1990-01', periods=124, freq='Q')
imports['imports'] = pd.to_numeric(imports['imports'])
imports=imports.set_index('Time')
imports.index = imports.index.to_timestamp()
investments = pd.read_csv('https://raw.githubusercontent.com/Louise198713/Speciale/main/Investment1990-2020.csv',
sep=';',
header=0,
index_col=0,
parse_dates=[0],
names=['Time','investments'])
investments['Time']=pd.period_range('1990-01', periods=124, freq='Q')
investments['investments'] = pd.to_numeric(investments['investments'])
investments=investments.set_index('Time')
investments.index = investments.index.to_timestamp()
government_expenditure = pd.read_csv('https://raw.githubusercontent.com/Louise198713/Speciale/main/Public%20consumption1990-2020.csv',
sep=';',
header=0,
index_col=0,
parse_dates=[0],
names=['Time','government_expenditure'])
government_expenditure['Time']=pd.period_range('1990-01', periods=124, freq='Q')
government_expenditure['government_expenditure'] = pd.to_numeric(government_expenditure['government_expenditure'])
government_expenditure=government_expenditure.set_index('Time')
government_expenditure.index = government_expenditure.index.to_timestamp()
gdp = pd.read_csv('https://raw.githubusercontent.com/Louise198713/Speciale/main/GDP1990-2020.csv',
sep=';',
header=0,
index_col=0,
parse_dates=[0],
names=['Time','gdp'])
gdp['Time']=pd.period_range('1990-01', periods=124, freq='Q')
gdp['gdp'] = pd.to_numeric(gdp['gdp'])
gdp=gdp.set_index('Time')
gdp.index = gdp.index.to_timestamp()
consumption = consumption.rename(columns= {'consumption':'y'})
imports = imports.rename(columns= {'imports':'y'})
exports = exports.rename(columns= {'exports':'y'})
investments = investments.rename(columns= {'investments':'y'})
government_expenditure = government_expenditure.rename(columns= {'government_expenditure':'y'})
data = consumption
#Creating a y variable, y+1
data['y+1_unscaled'] = data.y.shift(-1, fill_value=data.y.iloc[-1]) #shift 1 and fill the last (empty) row with previous value
# normalizing
scaler = MinMaxScaler(feature_range=(0, 1)) #Scaling between 0 and 1
data['y_scaled'] = scaler.fit_transform(data['y'].values.reshape(-1, 1))
#Creating a scaled y variable
data['y+1'] = data.y_scaled.shift(-1, fill_value=data.y_scaled.iloc[-1]) #shift 1 and fill the last (empty) row with previous value
# get the data as matrix
data_p = consumption.iloc[:,2:].values.astype('float32') #Only using the last to columns for modelling.
#Splitting into train and test
train, test = data_p[:118], data_p[118:]
print(len(train), len(test))
#Defining x and y
X_train = train[:,0]
y_train = train[:,1] #Consumption+1 for training set
X_test = test[:,0]
y_test = test[:,1]
# reshape input to be [samples, time steps, features]
X_train = numpy.reshape(X_train, (X_train.shape[0], 1, 1))
X_test = numpy.reshape(X_test, (X_test.shape[0], 1, 1))
model = Sequential()
model.add(LSTM(units=64, input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=True)) #1 timestep and 1 feature
model.add(Dropout(0.1))
model.add(LSTM(64, return_sequences=True))
model.add(Dropout(0.1))
model.add(LSTM(64))
model.add(Dropout(0.1))
model.add(Dense(1))
model.compile( loss='mean_squared_error', optimizer='adam')
# fit network
history = model.fit(X_train, y_train, epochs=30, batch_size=16, validation_split=0.1, verbose=0, shuffle=False)
# make predictions
trainPredict = model.predict(X_train) #Predict y from x
testPredict = model.predict(X_test)
# invert predictions
trainPredict = scaler.inverse_transform(trainPredict)
y_train = scaler.inverse_transform([y_train])
testPredict = scaler.inverse_transform(testPredict)
y_test = scaler.inverse_transform([y_test])
#Creating a variable "y_pred" for all the predicted values for both train and test
data.insert(2, "y_pred", np.zeros((124,1)), False) #Inserting empty column with zeros to hold the predicted values
data['y_pred'].iloc[-6:] = testPredict.flatten() #Filling the last 6 rows with the predicted test values
data['y_pred'].iloc[:118] =trainPredict.flatten()
data
#The predicted values are one period ahead. Adjusting the time.
data_new= data[['y_pred', 'y+1_unscaled']]
data_new['Time']=pd.period_range('1990-04', periods=124, freq='Q')
data_new=data_new.set_index('Time')
data_new.index =data_new.index.to_timestamp()
data_new = data_new.iloc[:-1]
data_new
plt.figure(figsize=(15, 7.5))
plt.plot(data_new.y_pred, color='r', label='model')
plt.axvspan(data_new.index[-7], data_new.index[-1], alpha=0.5, color='lightgrey')
plt.plot(data_new['y+1_unscaled'], label='actual')
plt.title('Consumption')
plt.ylabel('Bill. DKK')
plt.xlabel('Time')
plt.legend()
plt.show()
predicted_train_consumption_LSTM = data_new.y_pred[:117]
actual_train_consumption_LSTM = data_new['y+1_unscaled'][:117]
forecasted_test_consumption_LSTM = data_new.y_pred.iloc[-6:]
actual_test_consumption_LSTM = data_new['y+1_unscaled'].iloc[-6:]
#RMSEs
trainScore = math.sqrt(mean_squared_error(actual_train_consumption_LSTM, data_new.y_pred[:117]))
print('Train Score: %.2f RMSE' % (trainScore))
testScore = math.sqrt(mean_squared_error(actual_test_consumption_LSTM, forecasted_test_consumption_LSTM))
print('Test Score: %.2f RMSE' % (testScore))
data = imports
#Creating a y variable, y+1
data['y+1_unscaled'] = data.y.shift(-1, fill_value=data.y.iloc[-1]) #shift 1 and fill the last (empty) row with previous value
#Normalizing
scaler = MinMaxScaler(feature_range=(0, 1)) #Scaling between 0 and 1
data['y_scaled'] = scaler.fit_transform(data.y.values.reshape(-1, 1))
#Creating a scaled y variable
data['y+1'] = data.y_scaled.shift(-1, fill_value=data.y_scaled.iloc[-1]) #shift 1 and fill the last (empty) row with previous value
# get the data as matrix
data_p = data.iloc[:,2:].values.astype('float32') #Only using the last to columns for modelling.
#Splitting into train and test
train, test = data_p[:118], data_p[118:]
print(len(train), len(test))
#Defining x and y
X_train = train[:,0]
y_train = train[:,1]
X_test = test[:,0]
y_test = test[:,1]
# reshape input to be [samples, time steps, features]
X_train = numpy.reshape(X_train, (X_train.shape[0], 1, 1))
X_test = numpy.reshape(X_test, (X_test.shape[0], 1, 1))
model = Sequential()
model.add(LSTM(units=64, input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=True)) #1 timestep and 1 feature
model.add(Dropout(0.1))
model.add(LSTM(64, return_sequences=True))
model.add(Dropout(0.1))
model.add(LSTM(64))
model.add(Dropout(0.1))
model.add(Dense(1))
model.compile( loss='mean_squared_error', optimizer='adam')
# fit network
history = model.fit( X_train, y_train, epochs=30, batch_size=16, validation_split=0.1, verbose=0, shuffle=False)
# make predictions
trainPredict = model.predict(X_train) #Predict y from x
testPredict = model.predict(X_test)
# invert predictions
trainPredict = scaler.inverse_transform(trainPredict)
y_train = scaler.inverse_transform([y_train])
testPredict = scaler.inverse_transform(testPredict)
y_test = scaler.inverse_transform([y_test])
#Creating a variable "Consumption_pred" for all the predicted values for both train and test
data.insert(2, "y_pred", np.zeros((124,1)), False) #Inserting empty column with zeros to hold the predicted values
data['y_pred'].iloc[-6:] = testPredict.flatten() #Filling the last 6 rows with the predicted test values
data['y_pred'].iloc[:118] =trainPredict.flatten()
#The predicted values are one period ahead. Adjusting the time.
data_new= data[['y_pred', 'y+1_unscaled']]
data_new['Time']=pd.period_range('1990-04', periods=124, freq='Q')
data_new=data_new.set_index('Time')
data_new.index =data_new.index.to_timestamp()
data_new = data_new.iloc[:-1]
data_new
plt.figure(figsize=(15, 7.5))
plt.plot(data_new.y_pred, color='r', label='model')
plt.axvspan(data_new.index[-7], data_new.index[-1], alpha=0.5, color='lightgrey')
plt.plot(data_new['y+1_unscaled'], label='actual')
plt.title('Imports')
plt.ylabel('Bill. DKK')
plt.xlabel('Time')
plt.legend()
plt.show()
predicted_train_imports_LSTM = data_new.y_pred[:117]
actual_train_imports_LSTM = data_new['y+1_unscaled'][:117]
forecasted_test_imports_LSTM = data_new.y_pred.iloc[-6:]
actual_test_imports_LSTM = data_new['y+1_unscaled'].iloc[-6:]
#RMSEs
trainScore = math.sqrt(mean_squared_error(actual_train_imports_LSTM, data_new.y_pred[:117]))
print('Train Score: %.2f RMSE' % (trainScore))
testScore = math.sqrt(mean_squared_error(actual_test_imports_LSTM, forecasted_test_imports_LSTM))
print('Test Score: %.2f RMSE' % (testScore))
data = exports
#Creating a y variable, y+1
data['y+1_unscaled'] = data.y.shift(-1, fill_value=data.y.iloc[-1]) #shift 1 and fill the last (empty) row with previous value
#Normalizing
scaler = MinMaxScaler(feature_range=(0, 1)) #Scaling between 0 and 1
data['y_scaled'] = scaler.fit_transform(data.y.values.reshape(-1, 1))
#Creating a scaled y variable
data['y+1'] = data.y_scaled.shift(-1, fill_value=data.y_scaled.iloc[-1]) #shift 1 and fill the last (empty) row with previous value
# get the data as matrix
data_p = data.iloc[:,2:].values.astype('float32') #Only using the last to columns for modelling.
#Splitting into train and test
train, test = data_p[:118], data_p[118:]
print(len(train), len(test))
#Defining x and y
X_train = train[:,0]
y_train = train[:,1]
X_test = test[:,0]
y_test = test[:,1]
# reshape input to be [samples, time steps, features]
X_train = numpy.reshape(X_train, (X_train.shape[0], 1, 1))
X_test = numpy.reshape(X_test, (X_test.shape[0], 1, 1))
model = Sequential()
model.add(LSTM(units=64, input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=True)) #1 timestep and 1 feature
model.add(Dropout(0.1))
model.add(LSTM(64, return_sequences=True))
model.add(Dropout(0.1))
model.add(LSTM(64))
model.add(Dropout(0.1))
model.add(Dense(1))
model.compile( loss='mean_squared_error', optimizer='adam')
# fit network
history = model.fit( X_train, y_train, epochs=30, batch_size=16, validation_split=0.1, verbose=0, shuffle=False)
# make predictions
trainPredict = model.predict(X_train) #Predict y from x
testPredict = model.predict(X_test)
# invert predictions
trainPredict = scaler.inverse_transform(trainPredict)
y_train = scaler.inverse_transform([y_train])
testPredict = scaler.inverse_transform(testPredict)
y_test = scaler.inverse_transform([y_test])
#Creating a variable "Consumption_pred" for all the predicted values for both train and test
data.insert(2, "y_pred", np.zeros((124,1)), False) #Inserting empty column with zeros to hold the predicted values
data['y_pred'].iloc[-6:] = testPredict.flatten() #Filling the last 6 rows with the predicted test values
data['y_pred'].iloc[:118] =trainPredict.flatten()
#The predicted values are one period ahead. Adjusting the time.
data_new= data[['y_pred', 'y+1_unscaled']]
data_new['Time']=pd.period_range('1990-04', periods=124, freq='Q')
data_new=data_new.set_index('Time')
data_new.index =data_new.index.to_timestamp()
data_new = data_new.iloc[:-1]
data_new
plt.figure(figsize=(15, 7.5))
plt.plot(data_new.y_pred, color='r', label='model')
plt.axvspan(data_new.index[-7], data_new.index[-1], alpha=0.5, color='lightgrey')
plt.plot(data_new['y+1_unscaled'], label='actual')
plt.title('Exports')
plt.ylabel('Bill. DKK')
plt.xlabel('Time')
plt.legend()
plt.show()
predicted_train_exports_LSTM = data_new.y_pred[:117]
actual_train_exports_LSTM = data_new['y+1_unscaled'][:117]
forecasted_test_exports_LSTM = data_new.y_pred.iloc[-6:]
actual_test_exports_LSTM = data_new['y+1_unscaled'].iloc[-6:]
#RMSEs
trainScore = math.sqrt(mean_squared_error(actual_train_exports_LSTM, data_new.y_pred[:117]))
print('Train Score: %.2f RMSE' % (trainScore))
testScore = math.sqrt(mean_squared_error(actual_test_exports_LSTM, forecasted_test_exports_LSTM))
print('Test Score: %.2f RMSE' % (testScore))
data = investments
#Creating a y variable, y+1
data['y+1_unscaled'] = data.y.shift(-1, fill_value=data.y.iloc[-1]) #shift 1 and fill the last (empty) row with previous value
#Normalizing
scaler = MinMaxScaler(feature_range=(0, 1)) #Scaling between 0 and 1
data['y_scaled'] = scaler.fit_transform(data.y.values.reshape(-1, 1))
#Creating a scaled y variable
data['y+1'] = data.y_scaled.shift(-1, fill_value=data.y_scaled.iloc[-1]) #shift 1 and fill the last (empty) row with previous value
# get the data as matrix
data_p = data.iloc[:,2:].values.astype('float32') #Only using the last to columns for modelling.
#Splitting into train and test
train, test = data_p[:118], data_p[118:]
print(len(train), len(test))
#Defining x and y
X_train = train[:,0]
y_train = train[:,1]
X_test = test[:,0]
y_test = test[:,1]
# reshape input to be [samples, time steps, features]
X_train = numpy.reshape(X_train, (X_train.shape[0], 1, 1))
X_test = numpy.reshape(X_test, (X_test.shape[0], 1, 1))
model = Sequential()
model.add(LSTM(units=64, input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=True)) #1 timestep and 1 feature
model.add(Dropout(0.1))
model.add(LSTM(64, return_sequences=True))
model.add(Dropout(0.1))
model.add(LSTM(64))
model.add(Dropout(0.1))
model.add(Dense(1))
model.compile( loss='mean_squared_error', optimizer='adam')
# fit network
history = model.fit( X_train, y_train, epochs=30, batch_size=16, validation_split=0.1, verbose=0, shuffle=False)
# make predictions
trainPredict = model.predict(X_train) #Predict y from x
testPredict = model.predict(X_test)
# invert predictions
trainPredict = scaler.inverse_transform(trainPredict)
y_train = scaler.inverse_transform([y_train])
testPredict = scaler.inverse_transform(testPredict)
y_test = scaler.inverse_transform([y_test])
#Creating a variable "y_pred" for all the predicted values for both train and test
data.insert(2, "y_pred", np.zeros((124,1)), False) #Inserting empty column with zeros to hold the predicted values
data['y_pred'].iloc[-6:] = testPredict.flatten() #Filling the last 6 rows with the predicted test values
data['y_pred'].iloc[:118] =trainPredict.flatten()
#The predicted values are one period ahead. Adjusting the time.
data_new= data[['y_pred', 'y+1_unscaled']]
data_new['Time']=pd.period_range('1990-04', periods=124, freq='Q')
data_new=data_new.set_index('Time')
data_new.index =data_new.index.to_timestamp()
data_new = data_new.iloc[:-1]
data_new
plt.figure(figsize=(15, 7.5))
plt.plot(data_new.y_pred, color='r', label='model')
plt.axvspan(data_new.index[-7], data_new.index[-1], alpha=0.5, color='lightgrey')
plt.plot(data_new['y+1_unscaled'], label='actual')
plt.title('Investments')
plt.ylabel('Bill. DKK')
plt.xlabel('Time')
plt.legend()
plt.show()
predicted_train_investments_LSTM = data_new.y_pred[:117]
actual_train_investments_LSTM = data_new['y+1_unscaled'][:117]
forecasted_test_investments_LSTM = data_new.y_pred.iloc[-6:]
actual_test_investments_LSTM = data_new['y+1_unscaled'].iloc[-6:]
#RMSEs
trainScore = math.sqrt(mean_squared_error(actual_train_investments_LSTM, data_new.y_pred[:117]))
print('Train Score: %.2f RMSE' % (trainScore))
testScore = math.sqrt(mean_squared_error(actual_test_investments_LSTM, forecasted_test_investments_LSTM))
print('Test Score: %.2f RMSE' % (testScore))
data = government_expenditure
#Creating a y variable, y+1
data['y+1_unscaled'] = data.y.shift(-1, fill_value=data.y.iloc[-1]) #shift 1 and fill the last (empty) row with previous value
#Normalizing
scaler = MinMaxScaler(feature_range=(0, 1)) #Scaling between 0 and 1
data['y_scaled'] = scaler.fit_transform(data.y.values.reshape(-1, 1))
#Creating a scaled y variable
data['y+1'] = data.y_scaled.shift(-1, fill_value=data.y_scaled.iloc[-1]) #shift 1 and fill the last (empty) row with previous value
# get the data as matrix
data_p = data.iloc[:,2:].values.astype('float32') #Only using the last to columns for modelling.
#Splitting into train and test
train, test = data_p[:118], data_p[118:]
print(len(train), len(test))
#Defining x and y
X_train = train[:,0]
y_train = train[:,1]
X_test = test[:,0]
y_test = test[:,1]
# reshape input to be [samples, time steps, features]
X_train = numpy.reshape(X_train, (X_train.shape[0], 1, 1))
X_test = numpy.reshape(X_test, (X_test.shape[0], 1, 1))
model = Sequential()
model.add(LSTM(units=64, input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=True)) #1 timestep and 1 feature
model.add(Dropout(0.1))
model.add(LSTM(64, return_sequences=True))
model.add(Dropout(0.1))
model.add(LSTM(64))
model.add(Dropout(0.1))
model.add(Dense(1))
model.compile( loss='mean_squared_error', optimizer='adam')
# fit network
history = model.fit( X_train, y_train, epochs=30, batch_size=16, validation_split=0.1, verbose=0, shuffle=False)
# make predictions
trainPredict = model.predict(X_train) #Predict y from x
testPredict = model.predict(X_test)
# invert predictions
trainPredict = scaler.inverse_transform(trainPredict)
y_train = scaler.inverse_transform([y_train])
testPredict = scaler.inverse_transform(testPredict)
y_test = scaler.inverse_transform([y_test])
#Creating a variable "y_pred" for all the predicted values for both train and test
data.insert(2, "y_pred", np.zeros((124,1)), False) #Inserting empty column with zeros to hold the predicted values
data['y_pred'].iloc[-6:] = testPredict.flatten() #Filling the last 6 rows with the predicted test values
data['y_pred'].iloc[:118] =trainPredict.flatten()
#The predicted values are one period ahead. Adjusting the time.
data_new= data[['y_pred', 'y+1_unscaled']]
data_new['Time']=pd.period_range('1990-04', periods=124, freq='Q')
data_new=data_new.set_index('Time')
data_new.index =data_new.index.to_timestamp()
data_new = data_new.iloc[:-1]
data_new
plt.figure(figsize=(15, 7.5))
plt.plot(data_new.y_pred, color='r', label='model')
plt.axvspan(data_new.index[-7], data_new.index[-1], alpha=0.5, color='lightgrey')
plt.plot(data_new['y+1_unscaled'], label='actual')
plt.title('Government Expenditures')
plt.ylabel('Bill. DKK')
plt.xlabel('Time')
plt.legend()
plt.show()
predicted_train_gov_LSTM = data_new.y_pred[:117]
actual_train_gov_LSTM = data_new['y+1_unscaled'][:117]
forecasted_test_gov_LSTM = data_new.y_pred.iloc[-6:]
actual_test_gov_LSTM = data_new['y+1_unscaled'].iloc[-6:]
#RMSEs
trainScore = math.sqrt(mean_squared_error(actual_train_gov_LSTM, data_new.y_pred[:117]))
print('Train Score: %.2f RMSE' % (trainScore))
testScore = math.sqrt(mean_squared_error(actual_test_gov_LSTM, forecasted_test_gov_LSTM))
print('Test Score: %.2f RMSE' % (testScore))
consumption = pd.read_csv('https://raw.githubusercontent.com/Louise198713/Speciale/main/Privatforbrug%201990K1-2020K4.csv',
sep=';',
header=0,
index_col=0,
parse_dates=[0],
names=['Time','consumption'])
consumption['Time']=pd.period_range('1990-01', periods=124, freq='Q')
consumption['consumption'] = pd.to_numeric(consumption['consumption'])
consumption=consumption.set_index('Time')
consumption.index = consumption.index.to_timestamp()
exports = pd.read_csv('https://raw.githubusercontent.com/Louise198713/Speciale/main/Eksport%201990K1-2020K4.csv',
sep=';',
header=0,
index_col=0,
parse_dates=[0],
names=['Time','exports'])
exports['Time']=pd.period_range('1990-01', periods=124, freq='Q')
exports['exports'] = pd.to_numeric(exports['exports'])
exports=exports.set_index('Time')
exports.index = exports.index.to_timestamp()
imports = pd.read_csv('https://raw.githubusercontent.com/Louise198713/Speciale/main/Import%201990K1-2020K4.csv',
sep=';',
header=0,
index_col=0,
parse_dates=[0],
names=['Time','imports'])
imports['Time']=pd.period_range('1990-01', periods=124, freq='Q')
imports['imports'] = pd.to_numeric(imports['imports'])
imports=imports.set_index('Time')
imports.index = imports.index.to_timestamp()
investments = pd.read_csv('https://raw.githubusercontent.com/Louise198713/Speciale/main/Investment1990-2020.csv',
sep=';',
header=0,
index_col=0,
parse_dates=[0],
names=['Time','investments'])
investments['Time']=pd.period_range('1990-01', periods=124, freq='Q')
investments['investments'] = pd.to_numeric(investments['investments'])
investments=investments.set_index('Time')
investments.index = investments.index.to_timestamp()
government_expenditure = pd.read_csv('https://raw.githubusercontent.com/Louise198713/Speciale/main/Public%20consumption1990-2020.csv',
sep=';',
header=0,
index_col=0,
parse_dates=[0],
names=['Time','government_expenditure'])
government_expenditure['Time']=pd.period_range('1990-01', periods=124, freq='Q')
government_expenditure['government_expenditure'] = pd.to_numeric(government_expenditure['government_expenditure'])
government_expenditure=government_expenditure.set_index('Time')
government_expenditure.index = government_expenditure.index.to_timestamp()
gdp = pd.read_csv('https://raw.githubusercontent.com/Louise198713/Speciale/main/GDP1990-2020.csv',
sep=';',
header=0,
index_col=0,
parse_dates=[0],
names=['Time','gdp'])
gdp['Time']=pd.period_range('1990-01', periods=124, freq='Q')
gdp['gdp'] = pd.to_numeric(gdp['gdp'])
gdp=gdp.set_index('Time')
gdp.index = gdp.index.to_timestamp()
data = pd.merge(gdp, consumption, on='Time')
data = pd.merge(data, imports, on='Time')
data = pd.merge(data, exports, on='Time')
data = pd.merge(data, investments, on='Time')
data = pd.merge(data, government_expenditure, on='Time')
data
#Creating a y variable
data['gdp_y'] = data['gdp'].shift(-1, fill_value=data['gdp'].iloc[-1])
#Splitting into test and train
train_df, test_df = data[:118], data[118:]
#Defining x and y
X_train = train_df.iloc[:,:-1].values
y_train = train_df.iloc[:,-1].values
X_test = test_df.iloc[:,:-1].values
y_test = test_df.iloc[:,-1].values
data
x_scaler = MinMaxScaler(feature_range=(-1, 1))
y_scaler = MinMaxScaler(feature_range=(-1, 1))
X_train = x_scaler.fit_transform(X_train)
y_train = y_scaler.fit_transform(y_train.reshape(-1,1))
X_test = x_scaler.transform(X_test)
y_test = y_scaler.transform(y_test.reshape(-1,1))
# reshape input to be [samples, time steps, features]
X_train = numpy.reshape(X_train, (118, 1, 6))
X_test = numpy.reshape(X_test, (6, 1, 6))
model = Sequential()
model.add(LSTM(64, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
# fit network
history = model.fit(X_train, y_train, epochs=40, batch_size=32, validation_split=0.1, verbose=0, shuffle=False)
# make predictions
trainPredict = model.predict(X_train) #Predict y from x
testPredict = model.predict(X_test)
# invert predictions
trainPredict = y_scaler.inverse_transform(trainPredict)
y_train = y_scaler.inverse_transform(y_train)
testPredict = y_scaler.inverse_transform(testPredict)
y_test = y_scaler.inverse_transform(y_test)
#Creating a variable "gdp_pred" for all the predicted values for both train and test
data.insert(6, "gdp_pred", np.zeros((124,1)), False) #Inserting empty column with zeros to hold the predicted values
data['gdp_pred'].iloc[-6:] = testPredict.flatten() #Filling the last 6 rows with the predicted test values
data['gdp_pred'].iloc[:118] =trainPredict.flatten()
data
data_new= data[['gdp_pred', 'gdp_y']]
data_new['Time']=pd.period_range('1990-04', periods=124, freq='Q')
data_new=data_new.set_index('Time')
data_new.index =data_new.index.to_timestamp()
data_new = data_new.iloc[:-1]
data_new
plt.figure(figsize=(15, 7.5))
plt.plot(data_new.gdp_pred, color='r', label='model')
plt.axvspan(data_new.index[-7], data_new.index[-1], alpha=0.5, color='lightgrey')
plt.plot(data_new['gdp_y'], label='actual')
plt.title('GDP')
plt.ylabel('Bill. DKK')
plt.xlabel('Time')
plt.legend()
plt.show()
predicted_train_gdp_LSTM = data_new.gdp_pred[:117]
actual_train_gdp_LSTM = data_new['gdp_y'][:117]
forecasted_test_gdp_LSTM = data_new.gdp_pred.iloc[-6:]
actual_test_gdp_LSTM = data_new['gdp_y'].iloc[-6:]
#RMSEs
trainScore = math.sqrt(mean_squared_error(actual_train_gdp_LSTM, predicted_train_gdp_LSTM))
print('Train Score: %.2f RMSE' % (trainScore))
testScore = math.sqrt(mean_squared_error(actual_test_gdp_LSTM, forecasted_test_gdp_LSTM))
print('Test Score: %.2f RMSE' % (testScore))
def diebold_mariano_test(actual_lst, pred1_lst, pred2_lst, h = 1, crit="MSE", power = 2):
# Routine for checking errors
def error_check():
rt = 0
msg = ""
# Check if h is an integer
if (not isinstance(h, int)):
rt = -1
msg = "The type of the number of steps ahead (h) is not an integer."
return (rt,msg)
# Check the range of h
if (h < 1):
rt = -1
msg = "The number of steps ahead (h) is not large enough."
return (rt,msg)
len_act = len(actual_lst)
len_p1 = len(pred1_lst)
len_p2 = len(pred2_lst)
# Check if lengths of actual values and predicted values are equal
if (len_act != len_p1 or len_p1 != len_p2 or len_act != len_p2):
rt = -1
msg = "Lengths of actual_lst, pred1_lst and pred2_lst do not match."
return (rt,msg)
# Check range of h
if (h >= len_act):
rt = -1
msg = "The number of steps ahead is too large."
return (rt,msg)
# Check if criterion supported
if (crit != "MSE" and crit != "MAPE" and crit != "MAD" and crit != "poly"):
rt = -1
msg = "The criterion is not supported."
return (rt,msg)
# Check if every value of the input lists are numerical values
from re import compile as re_compile
comp = re_compile("^\d+?\.\d+?$")
def compiled_regex(s):
""" Returns True is string is a number. """
if comp.match(s) is None:
return s.isdigit()
return True
for actual, pred1, pred2 in zip(actual_lst, pred1_lst, pred2_lst):
is_actual_ok = compiled_regex(str(abs(actual)))
is_pred1_ok = compiled_regex(str(abs(pred1)))
is_pred2_ok = compiled_regex(str(abs(pred2)))
if (not (is_actual_ok and is_pred1_ok and is_pred2_ok)):
msg = "An element in the actual_lst, pred1_lst or pred2_lst is not numeric."
rt = -1
return (rt,msg)
return (rt,msg)
# Error check
error_code = error_check()
# Raise error if cannot pass error check
if (error_code[0] == -1):
raise SyntaxError(error_code[1])
return
# Import libraries
from scipy.stats import t
import collections
import pandas as pd
import numpy as np
# Initialise lists
e1_lst = []
e2_lst = []
d_lst = []
# convert every value of the lists into real values
actual_lst = pd.Series(actual_lst).apply(lambda x: float(x)).tolist()
pred1_lst = pd.Series(pred1_lst).apply(lambda x: float(x)).tolist()
pred2_lst = pd.Series(pred2_lst).apply(lambda x: float(x)).tolist()
# Length of lists (as real numbers)
T = float(len(actual_lst))
# construct d according to crit
if (crit == "MSE"):
for actual,p1,p2 in zip(actual_lst,pred1_lst,pred2_lst):
e1_lst.append((actual - p1)**2)
e2_lst.append((actual - p2)**2)
for e1, e2 in zip(e1_lst, e2_lst):
d_lst.append(e1 - e2)
elif (crit == "MAD"):
for actual,p1,p2 in zip(actual_lst,pred1_lst,pred2_lst):
e1_lst.append(abs(actual - p1))
e2_lst.append(abs(actual - p2))
for e1, e2 in zip(e1_lst, e2_lst):
d_lst.append(e1 - e2)
elif (crit == "MAPE"):
for actual,p1,p2 in zip(actual_lst,pred1_lst,pred2_lst):
e1_lst.append(abs((actual - p1)/actual))
e2_lst.append(abs((actual - p2)/actual))
for e1, e2 in zip(e1_lst, e2_lst):
d_lst.append(e1 - e2)
elif (crit == "poly"):
for actual,p1,p2 in zip(actual_lst,pred1_lst,pred2_lst):
e1_lst.append(((actual - p1))**(power))
e2_lst.append(((actual - p2))**(power))
for e1, e2 in zip(e1_lst, e2_lst):
d_lst.append(e1 - e2)
# Mean of d
mean_d = pd.Series(d_lst).mean()
# Find autocovariance and construct DM test statistics
def autocovariance(Xi, N, k, Xs):
autoCov = 0
T = float(N)
for i in np.arange(0, N-k):
autoCov += ((Xi[i+k])-Xs)*(Xi[i]-Xs)
return (1/(T))*autoCov
gamma = []
for lag in range(0,h):
gamma.append(autocovariance(d_lst,len(d_lst),lag,mean_d)) # 0, 1, 2
V_d = (gamma[0] + 2*sum(gamma[1:]))/T
DM_stat=V_d**(-0.5)*mean_d
harvey_adj=((T+1-2*h+h*(h-1)/T)/T)**(0.5)
DM_stat = harvey_adj*DM_stat
# Find p-value
p_value = 2*t.cdf(-abs(DM_stat), df = T - 1)
# Construct named tuple for return
dm_return = collections.namedtuple('dm_return', 'DM p_value')
rt = dm_return(DM = DM_stat, p_value = p_value)
return rt
DM test for Consumption
dm_consumption_base_ARIMA = diebold_mariano_test(consumption.consumption[-6:], forecast_values_consumption_baseline.squeeze(), forecast_values_consumption_ARIMA)
dm_consumption_base_LSTM = diebold_mariano_test(consumption.consumption[-6:], forecast_values_consumption_baseline.squeeze(), forecasted_test_consumption_LSTM)
dm_consumption_ARIMA_LSTM = diebold_mariano_test(consumption.consumption[-6:], forecast_values_consumption_ARIMA.squeeze(), forecasted_test_consumption_LSTM)
print(f'Baseline and ARIMA:')
print(dm_consumption_base_ARIMA)
print(f'Baseline and LSTM:')
print(dm_consumption_base_LSTM)
print(f'ARIMA and LSTM:')
print(dm_consumption_ARIMA_LSTM)
DM test for Imports
dm_consumption_base_ARIMA = diebold_mariano_test(imports.imports[-6:], forecast_values_imports_baseline.squeeze(), forecast_values_imports_ARIMA)
dm_consumption_base_LSTM = diebold_mariano_test(imports.imports[-6:], forecast_values_imports_baseline.squeeze(), forecasted_test_imports_LSTM)
dm_consumption_ARIMA_LSTM = diebold_mariano_test(imports.imports[-6:], forecast_values_imports_ARIMA.squeeze(), forecasted_test_imports_LSTM)
print(f'Baseline and ARIMA:')
print(dm_consumption_base_ARIMA)
print(f'Baseline and LSTM:')
print(dm_consumption_base_LSTM)
print(f'ARIMA and LSTM:')
print(dm_consumption_ARIMA_LSTM)
DM test for Exports
dm_consumption_base_ARIMA = diebold_mariano_test(exports.exports[-6:], forecast_values_exports_baseline.squeeze(), forecast_values_exports_ARIMA)
dm_consumption_base_LSTM = diebold_mariano_test(exports.exports[-6:], forecast_values_exports_baseline.squeeze(), forecasted_test_exports_LSTM)
dm_consumption_ARIMA_LSTM = diebold_mariano_test(exports.exports[-6:], forecast_values_exports_ARIMA.squeeze(), forecasted_test_exports_LSTM)
print(f'Baseline and ARIMA:')
print(dm_consumption_base_ARIMA)
print(f'Baseline and LSTM:')
print(dm_consumption_base_LSTM)
print(f'ARIMA and LSTM:')
print(dm_consumption_ARIMA_LSTM)
DM test for Investments
dm_consumption_base_ARIMA = diebold_mariano_test(investments.investments[-6:], forecast_values_investments_baseline.squeeze(), forecast_values_investments_ARIMA)
dm_consumption_base_LSTM = diebold_mariano_test(investments.investments[-6:], forecast_values_investments_baseline.squeeze(), forecasted_test_investments_LSTM)
dm_consumption_ARIMA_LSTM = diebold_mariano_test(investments.investments[-6:], forecast_values_investments_ARIMA.squeeze(), forecasted_test_investments_LSTM)
print(f'Baseline and ARIMA:')
print(dm_consumption_base_ARIMA)
print(f'Baseline and LSTM:')
print(dm_consumption_base_LSTM)
print(f'ARIMA and LSTM:')
print(dm_consumption_ARIMA_LSTM)
DM test for Government Expenditures
dm_consumption_base_ARIMA = diebold_mariano_test(government_expenditure.government_expenditure[-6:], forecast_values_gov_baseline.squeeze(), forecast_values_government_expenditure_ARIMA)
dm_consumption_base_LSTM = diebold_mariano_test(government_expenditure.government_expenditure[-6:], forecast_values_gov_baseline.squeeze(), forecasted_test_gov_LSTM)
dm_consumption_ARIMA_LSTM = diebold_mariano_test(government_expenditure.government_expenditure[-6:], forecast_values_government_expenditure_ARIMA.squeeze(), forecasted_test_gov_LSTM)
print(f'Baseline and ARIMA:')
print(dm_consumption_base_ARIMA)
print(f'Baseline and LSTM:')
print(dm_consumption_base_LSTM)
print(f'ARIMA and LSTM:')
print(dm_consumption_ARIMA_LSTM)
DM test for GDP
dm_consumption_base_ARIMA = diebold_mariano_test(gdp.gdp[-6:], forecast_values_gov_baseline.squeeze(), forecast_values_gdp_ARIMA)
dm_consumption_base_LSTM = diebold_mariano_test(gdp.gdp[-6:], forecast_values_gov_baseline.squeeze(), forecasted_test_gdp_LSTM)
dm_consumption_ARIMA_LSTM = diebold_mariano_test(gdp.gdp[-6:], forecast_values_gdp_ARIMA.squeeze(), forecasted_test_gdp_LSTM)
print(f'Baseline and ARIMA:')
print(dm_consumption_base_ARIMA)
print(f'Baseline and LSTM:')
print(dm_consumption_base_LSTM)
print(f'ARIMA and LSTM:')
print(dm_consumption_ARIMA_LSTM)
%%shell
jupyter nbconvert --to html /content/MasterThesis.ipynb